Joyce Robbins
Basic faceting in ggplot2
When to free scales
Getting data in the right form
Plotting one variable per panel
Design decisions with multiple variables
Scatterplot matrices
Slides: www.github.com/jtr13/PanelPlots/NYCASAMetro.pdf
Code: www.github.com/jtr13/PanelPlots/NYCASAMetro.Rmd
Each panel represents one categorical group / levels of a factor (type can be factor or character or integer)
## Observations: 150
## Variables: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5…
## $ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1…
## $ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0…
## $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, set…
facet_grid()Note that y-axis is discrete
g <- mtcars %>% rownames_to_column("car") %>%
ggplot(aes(mpg, reorder(car, mpg))) + geom_point(color = "blue") + theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()) + ylab("")
gscales = "free_y"space = "free_y"incorrect
correct
One column of categorical data = one faceting direction
## Classes 'tbl_df', 'tbl' and 'data.frame': 58788 obs. of 24 variables:
## $ title : chr "$" "$1000 a Touchdown" "$21 a Day Once a Month" "$40,000" ...
## $ year : int 1971 1939 1941 1996 1975 2000 2002 2002 1987 1917 ...
## $ length : int 121 71 7 70 71 91 93 25 97 61 ...
## $ budget : int NA NA NA NA NA NA NA NA NA NA ...
## $ rating : num 6.4 6 8.2 8.2 3.4 4.3 5.3 6.7 6.6 6 ...
## $ votes : int 348 20 5 6 17 45 200 24 18 51 ...
## $ r1 : num 4.5 0 0 14.5 24.5 4.5 4.5 4.5 4.5 4.5 ...
## $ r2 : num 4.5 14.5 0 0 4.5 4.5 0 4.5 4.5 0 ...
## $ r3 : num 4.5 4.5 0 0 0 4.5 4.5 4.5 4.5 4.5 ...
## $ r4 : num 4.5 24.5 0 0 14.5 14.5 4.5 4.5 0 4.5 ...
## $ r5 : num 14.5 14.5 0 0 14.5 14.5 24.5 4.5 0 4.5 ...
## $ r6 : num 24.5 14.5 24.5 0 4.5 14.5 24.5 14.5 0 44.5 ...
## $ r7 : num 24.5 14.5 0 0 0 4.5 14.5 14.5 34.5 14.5 ...
## $ r8 : num 14.5 4.5 44.5 0 0 4.5 4.5 14.5 14.5 4.5 ...
## $ r9 : num 4.5 4.5 24.5 34.5 0 14.5 4.5 4.5 4.5 4.5 ...
## $ r10 : num 4.5 14.5 24.5 45.5 24.5 14.5 14.5 14.5 24.5 4.5 ...
## $ mpaa : chr "" "" "" "" ...
## $ Action : int 0 0 0 0 0 0 1 0 0 0 ...
## $ Animation : int 0 0 1 0 0 0 0 0 0 0 ...
## $ Comedy : int 1 1 0 1 0 0 0 0 0 0 ...
## $ Drama : int 1 0 0 0 0 1 1 0 1 0 ...
## $ Documentary: int 0 0 0 0 0 0 0 1 0 0 ...
## $ Romance : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Short : int 0 0 1 0 0 0 0 1 0 0 ...
Action columnmovies %>% filter(!is.na(budget)) %>% sample_n(1000) %>%
ggplot(aes(budget, length)) + geom_point() +
facet_wrap(~Action)mymovies <- movies %>%
select(title, length, budget, year, mpaa, Action:Short) %>%
gather(key = "genre", value, Action:Short) %>%
filter(value == 1) %>%
select(-value)
head(mymovies)## # A tibble: 6 x 6
## title length budget year mpaa genre
## <chr> <int> <int> <int> <chr> <chr>
## 1 $windle 93 NA 2002 R Action
## 2 'A' gai waak 106 NA 1983 PG-13 Action
## 3 'A' gai waak juk jaap 101 NA 1987 PG-13 Action
## 4 'Crocodile' Dundee II 110 NA 1988 "" Action
## 5 'Gator Bait 88 NA 1974 "" Action
## 6 'Sheba, Baby' 90 NA 1975 "" Action
genrelengthorder <- mymovies %>% group_by(genre) %>% summarize(meanlength = mean(length)) %>% arrange(desc(meanlength))
mymovies %>% filter(!is.na(budget)) %>%
sample_n(1000) %>%
ggplot(aes(budget/1000000, length)) + geom_point() +
facet_wrap(~factor(genre, levels = lengthorder$genre)) + xlab("budget (in millions)") +
geom_hline(data = lengthorder, aes(yintercept = meanlength), color = "red") +
ggtitle ("Length vs. Budget", subtitle = "Red line indicates mean length")newmovies <- movies %>% select(mpaa, year:votes) %>% gather(key = "variable", value, -mpaa) %>%
filter(!is.na(value))
newmovies %>% sample_n(6)## # A tibble: 6 x 3
## mpaa variable value
## <chr> <chr> <dbl>
## 1 R budget 20000000
## 2 "" year 1985
## 3 "" length 71
## 4 "" year 2004
## 5 "" votes 19
## 6 "" rating 6.6
variables become levels of the new “variable” column
mpaa and variableSame technique: variables become levels of a new “variable” column
byyear <- movies %>% select(title:votes) %>%
mutate(budget_millions = budget/1000000) %>%
select(-budget) %>%
gather(key = "variable", value, -title, -year) %>%
group_by(year, variable) %>%
summarize(mean = mean(value, na.rm = T))## Observations: 452
## Variables: 3
## Groups: year [113]
## $ year <int> 1893, 1893, 1893, 1893, 1894, 1894, 1894, 1894, 1895, 1…
## $ variable <chr> "budget_millions", "length", "rating", "votes", "budget…
## $ mean <dbl> NaN, 1.000000, 7.000000, 90.000000, NaN, 1.000000, 4.88…
mymovies <- mymovies %>% filter(length <= 180) %>%
mutate(decade = factor(round(year/10)*10))
ggplot(mymovies, aes(length)) + geom_histogram(fill = "cornflowerblue") +
facet_wrap(~decade)library(ggridges)
ggplot(mymovies, aes(x=length, y=fct_rev(decade))) +
geom_density_ridges(scale = 1.5, color = "blue", fill = "blue", alpha = .4) +
xlab("Length (in minutes)") +
ylab("") +
theme_ridges() x-axis, y-axis, row facets, column facets
color, size, shape
## Observations: 56,702
## Variables: 5
## $ `Order method type` <chr> "Telephone", "Telephone", "Telephone", "Tele…
## $ `Retailer type` <chr> "Department", "Department", "Department", "D…
## $ `Product line` <chr> "Camping", "Camping", "Camping", "Camping", …
## $ Revenue <dbl> 0.01809251, 0.08225408, 0.02143473, 0.070400…
## $ Date <date> 2012-03-30, 2012-03-30, 2012-03-30, 2012-03…
sales %>% group_by(Date) %>% summarize(SumRev = sum(Revenue)) %>%
ggplot(aes(Date, SumRev)) + geom_line() + ylab("millions $") + ggtitle("Revenue")sales %>% group_by(Date, `Product line`) %>%
summarize(SumRev = sum(Revenue)) %>%
ggplot(aes(Date, SumRev, color = fct_reorder2(`Product line`, Date, SumRev))) + geom_line() +
scale_x_date(date_labels = "%Y", date_breaks = "1 year") + ylab("millions $") + ggtitle("Revenue by Product line") +
labs(color = "Product line")sales %>% group_by(Date, `Order method type`) %>%
summarize(SumRev = sum(Revenue)) %>%
ggplot(aes(Date, SumRev, color = fct_reorder2(`Order method type`, Date, SumRev))) + geom_line() +
scale_x_date(date_labels = "%Y", date_breaks = "1 year") + ylab("millions $") + ggtitle("Revenue by Order method type") +
labs(color = "Order method type")sales %>% group_by(Date, `Retailer type`) %>%
summarize(SumRev = sum(Revenue)) %>%
ggplot(aes(Date, SumRev, color = fct_reorder2(`Retailer type`, Date, SumRev))) + geom_line() +
scale_x_date(date_labels = "%Y", date_breaks = "1 year") + ylab("millions $") + ggtitle("Revenue by Retailer type") +
labs(color = "Retailer type")Reorder factor levels
sales %>% group_by(Date, `Product line`, `Order method type`) %>%
summarize(SumRev = sum(Revenue)) %>%
ggplot(aes(Date, SumRev, color = `Order method type`)) + geom_line() +
scale_x_date(date_labels = "%Y", date_breaks = "1 year") +
facet_wrap(~`Product line`) + ylab("millions $") +
theme(legend.position = "bottom") +
ggtitle("Revenue, faceted on product line")sales %>% group_by(Date, `Product line`, `Retailer type`, `Order method type`) %>%
summarize(SumRev = sum(Revenue)) %>%
ggplot(aes(Date, SumRev, color = `Product line`)) + geom_line() +
scale_x_date(date_labels = "%Y", date_breaks = "1 year") +
facet_grid(`Retailer type`~`Order method type`) +
theme_bw() + theme(legend.position = "bottom") +
ggtitle("Revenue, faceted on Order method and Retailer type")sales %>% group_by(Date, `Product line`, `Retailer type`, `Order method type`) %>%
summarize(SumRev = sum(Revenue)) %>%
ggplot(aes(Date, SumRev, color = `Retailer type`)) + geom_line() +
scale_x_date(date_labels = "%Y", date_breaks = "1 year") +
facet_grid(`Product line`~`Order method type`) +
theme_bw() + theme(legend.position = "bottom") +
ggtitle("Revenue, faceted on Order method and Product line")sales %>% group_by(Date, `Product line`, `Retailer type`, `Order method type`) %>%
summarize(SumRev = sum(Revenue)) %>%
ggplot(aes(Date, SumRev, color = `Order method type`)) + geom_line() +
scale_x_date(date_labels = "%Y", date_breaks = "1 year") +
facet_grid(`Product line`~`Retailer type`) +
theme_bw() + theme(legend.position = "bottom") +
ggtitle("Revenue, faceted on Retailer type and Product line")Can’t create with faceting in ggplot2
Options:
plot()
lattice::splom()
GGally::ggpairs()